package com.tdu.spider.biz.service.zhihu;

import com.alibaba.fastjson.JSON;
import com.google.common.collect.Lists;
import com.tdu.spider.biz.service.zhihu.vo.*;
import org.apache.http.client.fluent.Request;
import org.apache.http.client.fluent.Response;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;

import java.nio.charset.Charset;
import java.util.List;

@Service
public class HtmlExtractService {
    private static Logger logger  = LoggerFactory.getLogger(HtmlExtractService.class);

    public List<TopicVO> queryAllTopic() {
        List<TopicVO> list=Lists.newArrayList();
        TopicVO topicVO=new TopicVO();
        topicVO.setId(19552249l);
        topicVO.setName("饮食");
        list.add(topicVO);
        return list;
    }

    public QuestionResultVO queryQuestionrByTopic(AnswerQueryVO answerQueryVO) throws Exception{
        String url = "https://www.zhihu.com/topic/" + answerQueryVO.getTopicId()
                     + "/top-answers";
        Response response = Request.Get(url)
            .addHeader("User-Agent",
                "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.1 Safari/603.1.30")
            .addHeader("authorization", "oauth c3cef7c66a1843f8b3a9e6a1e3160e20")
            .addHeader("accept", "application/json, text/plain, */*")
            .addHeader("Accept-Encoding", "gzip, deflate, sdch, br")
            .addHeader("Accept-Language", "zh-CN,zh;q=0.8,en;q=0.6")
            .addHeader("x-udid", "AFDCK2rDmAuPTq_hQ0A_99M7oj-9oEMb-hI=")
            .addHeader("Connection", "keep-alive")
            .addHeader("Referer",
                "Referer:https://www.zhihu.com/question/" + answerQueryVO.getQuestionId())
            .addHeader("Cookie",
                "q_c1=79860370fc544b1c9023b0207bd634fc|1519695007000|1490363301000; _xsrf=0cae89db-fe6c-465e-9064-d43bb45b49b3; __utma=51854390.1228250405.1504015222.1517224471.1519695002.12; __utmb=51854390.0.10.1519695002; __utmc=51854390; __utmv=51854390.000--|2=registration_date=20170226=1^3=entry_date=20170324=1; __utmz=51854390.1519695002.12.12.utmcsr=zhihu.com|utmccn=(referral)|utmcmd=referral|utmcct=/; cap_id=\"OWVlZGQzNGZmNzU5NDljNzhhZDA0MDM0M2Q3ODYxYmM=|1519694997|2a3fcea28c39ccfe0c9e97f75ceda9a577c0c581\"; l_cap_id=\"YmM4YjNkZTJiNzg4NDI5YmE4YWM3YWQ5YjBkM2Y2NTQ=|1519694997|3c1f3b55014a3aeec2bc69b92f2e96c2644ba59d\"; r_cap_id=\"ZGFjZDc0MjU1ZjViNDI2NWFjZmI2M2I2YTlhZWY2YzA=|1519694997|e9a23d3ebd7328f78381a08d43a52c6f348d7946\"; aliyungf_tc=AQAAAARm4SxaagQAyw+dt3egLQVZ1yxC; z_c0=\"2|1:0|10:1512267162|4:z_c0|92:Mi4xRDBVN0JBQUFBQUFBTU1MUjdHeVhDeVlBQUFCZ0FsVk5tcXNRV3dDaHVxRVcwdXlXZTQ3ckk0Nk5aM29Kblg1VE5B|cadf04e8c038a723eb1bdd6408aa65490a64a22966c54516c2da441069412d4a\"; q_c1=79860370fc544b1c9023b0207bd634fc|1507787531000|1490363301000; _zap=c10890a3-4a5c-4bef-947e-0ce42f682bf9; d_c0=\"ADDC0exslwuPTjL3byJa7WJvQ-ncdRZBpso=|1491920553\"")
            .execute();
        String content = response.returnContent().asString(Charset.forName("UTF-8"));
        logger.info("pageQueryActivitySense url:{},params:{},result:{}",url, answerQueryVO, content.substring(0,400));

        Document document = Jsoup.parse(content);
        Elements elements = document.select(".feed-item");
        List<QuestionVO> data= Lists.newArrayList();
        for (Element element : elements) {
            QuestionVO questionVO = new QuestionVO();
            String attr = element.select(".question_link").attr("href");
            questionVO.setId(Long.valueOf(attr.substring(attr.lastIndexOf("/")+1)));
            questionVO.setTitle(element.select(".question_link").text());
            questionVO.setContent(element.select(".zh-summary").text());
            questionVO.setComment_count(Integer.valueOf(element.select(".zm-item-vote-info").attr("data-votecount")));
            data.add(questionVO);
        }
        QuestionResultVO questionResultVO=new QuestionResultVO();
        questionResultVO.setPage(answerQueryVO.getPage());
        questionResultVO.setData(data);
        return questionResultVO;
    }

    public AnswerResultVO queryAnswer(AnswerQueryVO answerQueryVO) throws Exception {
        String url = "https://www.zhihu.com/api/v4/questions/" + answerQueryVO.getQuestionId()
                     + "/answers?include=data%5B*%5D.is_normal%2Cis_sticky%2Ccollapsed_by%2Csuggest_edit%2Ccomment_count%2Ccan_comment%2Ccontent%2Ceditable_content%2Cvoteup_count%2Creshipment_settings%2Ccomment_permission%2Cmark_infos%2Ccreated_time%2Cupdated_time%2Crelationship.is_authorized%2Cis_author%2Cvoting%2Cis_thanked%2Cis_nothelp%2Cupvoted_followees%3Bdata%5B*%5D.author.badge%5B%3F(type%3Dbest_answerer)%5D.topics&offset=&limit=10&sort_by=default";
        Response response = Request.Get(url)
            .addHeader("User-Agent",
                "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.1 Safari/603.1.30")
            .addHeader("authorization", "oauth c3cef7c66a1843f8b3a9e6a1e3160e20")
            .addHeader("accept", "application/json, text/plain, */*")
            .addHeader("Accept-Encoding", "gzip, deflate, sdch, br")
            .addHeader("Accept-Language", "zh-CN,zh;q=0.8,en;q=0.6")
            .addHeader("x-udid", "AFDCK2rDmAuPTq_hQ0A_99M7oj-9oEMb-hI=")
            .addHeader("Connection", "keep-alive")
            .addHeader("Referer",
                "Referer:https://www.zhihu.com/question/" + answerQueryVO.getQuestionId())
            .addHeader("Cookie",
                "q_c1=79860370fc544b1c9023b0207bd634fc|1519695007000|1490363301000; _xsrf=0cae89db-fe6c-465e-9064-d43bb45b49b3; __utma=51854390.1228250405.1504015222.1517224471.1519695002.12; __utmb=51854390.0.10.1519695002; __utmc=51854390; __utmv=51854390.000--|2=registration_date=20170226=1^3=entry_date=20170324=1; __utmz=51854390.1519695002.12.12.utmcsr=zhihu.com|utmccn=(referral)|utmcmd=referral|utmcct=/; cap_id=\"OWVlZGQzNGZmNzU5NDljNzhhZDA0MDM0M2Q3ODYxYmM=|1519694997|2a3fcea28c39ccfe0c9e97f75ceda9a577c0c581\"; l_cap_id=\"YmM4YjNkZTJiNzg4NDI5YmE4YWM3YWQ5YjBkM2Y2NTQ=|1519694997|3c1f3b55014a3aeec2bc69b92f2e96c2644ba59d\"; r_cap_id=\"ZGFjZDc0MjU1ZjViNDI2NWFjZmI2M2I2YTlhZWY2YzA=|1519694997|e9a23d3ebd7328f78381a08d43a52c6f348d7946\"; aliyungf_tc=AQAAAARm4SxaagQAyw+dt3egLQVZ1yxC; z_c0=\"2|1:0|10:1512267162|4:z_c0|92:Mi4xRDBVN0JBQUFBQUFBTU1MUjdHeVhDeVlBQUFCZ0FsVk5tcXNRV3dDaHVxRVcwdXlXZTQ3ckk0Nk5aM29Kblg1VE5B|cadf04e8c038a723eb1bdd6408aa65490a64a22966c54516c2da441069412d4a\"; q_c1=79860370fc544b1c9023b0207bd634fc|1507787531000|1490363301000; _zap=c10890a3-4a5c-4bef-947e-0ce42f682bf9; d_c0=\"ADDC0exslwuPTjL3byJa7WJvQ-ncdRZBpso=|1491920553\"")
            .execute();
        String content = response.returnContent().asString(Charset.forName("UTF-8"));
        logger.info("pageQueryActivitySense url:{},params:{},result:{}",url, answerQueryVO, content.substring(0,400));
        //String unescapeJson = StringEscapeUtils.unescapeJava(content);
        AnswerResultVO answerResultVO = JSON.parseObject(content, AnswerResultVO.class);
        if (answerResultVO != null && !CollectionUtils.isEmpty(answerResultVO.getData())) {
            for (AnswerVO answerVO : answerResultVO.getData()) {
                Document document = Jsoup.parse(answerVO.getContent());
                //原图片处理
                Elements elements = document.select("img[data-actualsrc]");
                for (Element element : elements) {
                    element.attr("src", element.attr("data-actualsrc")).attr("width", "600px");
                }
                //连接处理
                elements = document.select("a:contains(link.zhihu.com)");
                for (Element element : elements) {
                    String href = element.attr("href");
                    href = href.substring(href.indexOf("=") + 1);
                    element.attr("href", href).attr("target", "_blank");
                }
                elements = document.select("a");
                for (Element element : elements) {
                    element.attr("target", "_blank");
                }
                answerVO.setContent(document.html());
            }
        }
        return answerResultVO;
    }
}
